﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace vietnameseprocessing.Utils
{
    class RegexPatterns
    {
        public static string REGEX_TU = @"\w";
        public static string DAUCAU = "[.,;“”:()!?'`\\\"/_] ?";
        public static string patternTVKD = @"(r|d|gi|v|ch|tr|s|x|l|n|qu|b|c|k|g|gh|h|kh|m|ng|ngh|nh|p|ph|t|th)?(u|e|o|a|i|y|ie|uo|ai|au|ua|ia|ui|eu|oi|ay|uy|ou|ua|oa|ao|eo|ue|uya|uoi|uye|ieu|oai|oay|uoi|uou)(c|ch|m|n|ng|nh|p|t)?";
        public static string patternTVCD = @"\b(^(r|d|đ|gi|v|ch|tr|s|x|l|n|qu|b|c|k|g|gh|h|kh|m|ng|ngh|nh|p|ph|t|th))?(u|e|o|a|i|y|ie|uo|ai|au|ua|ia|ui|eu|oi|ay|uy|ou|ua|oa|ao|eo|ue|uya|uoi|uye|ieu|oai|oay|uoi|uou|a|á|à|ả|ã|ạ|ă|ắ|ằ|ẳ|ẵ|ặ|â|ấ|ầ|ẩ|ẫ|ậ|i|í|ì|ỉ|ĩ|ị|y|ý|ỳ|ỷ|ỹ|ỵ|u|ú|ù|ủ|ũ|ụ|ư|ứ|ừ|ử|ữ|ự|e|ô|ố|ồ|ổ|ỗ|ộ|ơ|ớ|ờ|ở|ỡ|ợ|iê|iế|iề|iể|iễ|iệ|ươ|ướ|ườ|ưở|ưỡ|ượ|oe|óe|òe|ỏe|ọe|ai|ái|ài|ải|ãi|ại|au|áu|àu|ảu|ãu|ạu|ua|úa|ùa|ủa|ũa|ụa|ưa|ứa|ừa|ửa|ữa|ựa|âu|ấu|ầu|ẩu|ẫu|ậu|uâ|uấ|uầ|uẩ|uẫ|uậ|ia|ía|ìa|ỉa|ĩa|ịa|ui|úi|ùi|ủi|ũi|ụi|ưi|ứi|ừi|ửi|ữi|ựi|iu|íu|ìu|ỉu|ĩu|ịu|êu|ếu|ều|ểu|ễu|ệu|oi|ói|òi|ỏi|õi|ọi|ôi|ối|ồi|ổi|ỗi|ội|ơi|ới|ời|ởi|ỡi|ợi|ay|áy|ày|ảy|ãy|ạy|ây|ấy|ầy|ẩy|ẫy|ậy|uy|úy|ùy|ủy|ũy|ụy|uô|uố|uồ|uổ|uỗ|uộ|uâ|uấ|uầ|uẩ|uẫ|uậ|oa|óa|òa|ỏa|õa|ọa|oă|oắ|oằ|oẳ|oẵ|oặ|ao|áo|ào|ảo|ão|ạo|eo|éo|èo|ẻo|ẽo|ẹo|uê|uế|uề|uể|uễ|uệ|uya|uýa|uỳa|uỷa|uỹa|uỵa|ươi|ưới|ười|ưởi|ưỡi|ượi|uyê|uyế|uyề|uyể|uyễ|uyệ|iêu|iếu|iều|iểu|iễu|iệu|oai|oái|oài|oải|oãi|oại|oay|oáy|oày|oảy|oãy|oạy|uây|uấy|uầy|uẩy|uẫy|uậy|uôi|uối|uồi|uổi|uỗi|uội|ươu|ướu|ườu|ưởu|ưỡu|ượu)((c|ch|m|n|ng|nh|u|p|t)$)?\b";
        public static string patternPreProcessing = @"({.*?}|\[.*?\]|<.*?>)|{.*?}|\[.*?\]|<.*?>|/\b(and|array|as|b(ool(ean)?|reak)|c(ase|atch|har|lass|on(st|tinue))|d(ef|elete|o(uble)?)|e(cho|lse(if)?|xit|xtends|xcept)|f(inally|loat|or(each)?|unction)|global|if|import|int(eger)?|long|new|object|or|pr(int|ivate|otected)|public|return|self|st(ring|ruct|atic)|switch|th(en|is|row)|try|(un)?signed|var|void|while)(?=\(|\b)/gi|&nbsp|&quot|&gt|\b(\w+)?(&#\d+;)((\w+)?)\b";
    }
}
